/*******************************************************************************
* Objective: Creating Agricultural Exports Dataset
*******************************************************************************/

********************************************************************************
**# Step 1: Raw Data - Load exports datasets
/*******************************************************************************
Notes: The process is done in a loop for Total Agricultural and Forestry exports
       as well as for Total Merchandise exports. Cashcrops exports are cleaned 
	   outside of the loop because the dataset is slighly different (export 
	   values are divided by year, country AND item)
*******************************************************************************/

* Cashcrop data
import delimited "raw_datasets/cashcrops_x.csv", encoding(UTF-8) clear	

* Format for consistency with core dataset
rename area country_wup
replace country_wup = "Swaziland" if country_wup == "Eswatini"
replace country_wup = "China, Taiwan Province of China" if country_wup == "China, Taiwan Province of"

* Keep only desired 116 countries. 
* Note: We also keep former countries that were later divided: Ethiopia PDR and Sudan (Former)
merge m:1 country_wup using "processed_datasets/ccode", keepusing(country_wb)
keep if _merge == 3 | country_wup == "Sudan (former)" | country_wup == "Ethiopia PDR"
drop _merge country_wb

* Renaming variables
rename value cashcrops_x

* Take 0s as missing
replace cashcrops_x  = . if cashcrops_x == 0

* Replacing 1960 value with 1961
replace year = 1960 if year == 1961

* Replacing 2020 with 2018
replace year = 2020 if year == 2018

* Sum
collapse (sum) cashcrops_x, by(country_wup year)

* Take 0s as missing
replace cashcrops_x = . if cashcrops_x == 0

save "processed_datasets/dataset_agriexp.dta", replace

/*******************************************************************************
* We clean and add the rest of the datasets with the following loop
*******************************************************************************/

foreach dfile in hides_x meat_x agritot_x merchfao_x forest_x {
    
	* Import Data
	import delimited "raw_datasets/`dfile'.csv", encoding(UTF-8) clear
	
	* We drop 2019 for making forestry, meat and hides&skin dataset (only data with 2019) consistent with the rest.
	drop if year == 2019
	
	* For consistency with main dataset
	rename area country_wup
	replace country_wup = "Swaziland" if country_wup == "Eswatini"
	replace country_wup = "China, Taiwan Province of China" if country_wup == "China, Taiwan Province of"
	
	* Keep only desired 116 countries
	* Note: We also keep former countries that later divided themselves. Ethiopia PDR and Sudan (Former) + South Sudan for calculations.
	merge m:1 country_wup using "processed_datasets/ccode", keepusing(country_wb)
	keep if _merge == 3 | country_wup == "Sudan (former)" | country_wup == "Ethiopia PDR"
	drop _merge country_wb 
	
	* Renaming outcome variable
	rename value `dfile'
	
	* Take 0s as missing
	replace `dfile' = . if `dfile' == 0
	
	*Approximating 1960
	replace year = 1960 if year == 1961
	
	* We use 2018 as 2020
	replace year = 2020 if year == 2018
		
	* Keep only variables of interest
	keep country_wup year `dfile'
	
	* Take 0s as missings again
	replace `dfile' = . if `dfile' == 0
	
	tempfile `dfile'_temp
	save ``dfile'_temp', replace
	
	* Merging with main dataset

	use "processed_datasets/dataset_agriexp.dta", clear
	merge 1:1 country_wup year using ``dfile'_temp'
	drop _merge
	save "processed_datasets/dataset_agriexp.dta", replace
} 

********************************************************************************
**# Step 2.1: Missing Data - Changing countries
/*******************************************************************************
Some countries became independent of one-another or were divided in two during
the considered period. In those cases we modify their export values accordingly.
*******************************************************************************/

sort country_wup year

/*******************************************************************************
Ethiopia and Eritrea - Fills values for 1960 - 1992

Ethipia PDR is divided between Eritrea and Ethiopia using shares of both 
countries based on average values of 1993-1995. Except for forestry, which uses 
averages for 2007-2009 to avoid missing data.
*******************************************************************************/

foreach eri_num of numlist 2263/2294{
    foreach eri_a in cashcrops_x {
		replace `eri_a' = `eri_a'[`eri_num'+116]*0.000573 if _n == `eri_num'
	}
	foreach eri_b in agritot_x {
	    replace `eri_b' = `eri_b'[`eri_num'+116]*0.017457 if _n == `eri_num'
	}
	foreach eri_c in merchfao_x {
	    replace `eri_c' = `eri_c'[`eri_num'+116]*0.153012 if _n == `eri_num'
	}
	foreach eri_d in forest_x {
	    replace `eri_d' = `eri_d'[`eri_num'+116]*0.123154 if _n == `eri_num'
	}
	foreach eri_e in meat_x {
	    replace `eri_e' = `eri_e'[`eri_num'+116]*0 if _n == `eri_num'
	}
	foreach eri_f in hides_x {
	    replace `eri_f' = `eri_f'[`eri_num'+116]*0.095729 if _n == `eri_num'
	}	
}

foreach eti_num of numlist 2321/2352{
    foreach eti_a in cashcrops_x {
		replace `eti_a' = `eti_a'[`eti_num'+58]*(1-0.000573) if _n == `eti_num'
	}
    foreach eti_b in agritot_x {
		replace `eti_b' = `eti_b'[`eti_num'+58]*(1-0.017457) if _n == `eti_num'
	}
    foreach eti_c in merchfao_x {
		replace `eti_c' = `eti_c'[`eti_num'+58]*(1-0.153012) if _n == `eti_num'
	}
    foreach eti_d in forest_x {
		replace `eti_d' = `eti_d'[`eti_num'+58]*(1-0.123154) if _n == `eti_num'
	}
    foreach eti_e in meat_x {
		replace `eti_e' = `eti_e'[`eti_num'+58]*(1) if _n == `eti_num'
	}
    foreach eti_f in hides_x {
		replace `eti_f' = `eti_f'[`eti_num'+58]*(1-0.095729) if _n == `eti_num'
	}
}

/*******************************************************************************
Namibia and South Africa - Values for 1960-1996

South Africa is divided between Namibia and South Africa using shares of both 
countries based on average values of 1997-1999. Except for forestry, which uses 
averages for 2000-2002 to avoid missing data. This calculations only apply to
cashcrop and forestry Data.
*******************************************************************************/

foreach nam_num of numlist 4467/4502{
    foreach nam_a in cashcrops_x {
		replace `nam_a' = `nam_a'[`nam_num'+1218]*0.023710 if _n == `nam_num'
	}
    foreach nam_b in forest_x {
		replace `nam_b' = `nam_b'[`nam_num'+1218]*0.001639 if _n == `nam_num'
	}
}
	
foreach sa_num of numlist 	5685/5720{
    foreach sa_a in cashcrops_x {
		replace `sa_a' = `sa_a'[`sa_num'] - `sa_a'[`sa_num'-1218] if _n == `sa_num'
	}
    foreach sa_b in forest_x {
		replace `sa_b' = `sa_b'[`sa_num'] - `sa_b'[`sa_num'-1218] if _n == `sa_num'
	}
}


/*******************************************************************************
Sudan

Merges the three Sudans (Sudan (fomer) before 2012 and Sudan and South Sudan 
after 2012) in one.
*******************************************************************************/


foreach su_x in cashcrops_x hides_x meat_x agritot_x merchfao_x forest_x {
    replace `su_x' = 0 if country_wup == "South Sudan" & `su_x' == . | country_wup == "Sudan" & `su_x' == . | country_wup == "Sudan (former)" & `su_x' == .
}

foreach su_num of numlist 	5859/5916{
    foreach su_a in cashcrops_x {
		replace `su_a' = `su_a'[`su_num'] + `su_a'[`su_num'+58] + `su_a'[`su_num'-116]  if _n == `su_num'
	}
    foreach su_b in agritot_x {
		replace `su_b' = `su_b'[`su_num'] + `su_b'[`su_num'+58] + `su_b'[`su_num'-116]  if _n == `su_num'
	}
    foreach su_c in merchfao_x {
		replace `su_c' = `su_c'[`su_num'] + `su_c'[`su_num'+58] + `su_c'[`su_num'-116]  if _n == `su_num'
	}
    foreach su_d in forest_x {
		replace `su_d' = `su_d'[`su_num'] + `su_d'[`su_num'+58] + `su_d'[`su_num'-116]  if _n == `su_num'
	}
    foreach su_e in meat_x {
		replace `su_e' = `su_e'[`su_num'] + `su_e'[`su_num'+58] + `su_e'[`su_num'-116]  if _n == `su_num'
	}
    foreach su_f in hides_x {
		replace `su_f' = `su_f'[`su_num'] + `su_f'[`su_num'+58] + `su_f'[`su_num'-116]  if _n == `su_num'
	}
}

foreach su_y in cashcrops_x hides_x meat_x agritot_x merchfao_x forest_x {
    replace `su_y' = . if country_wup == "Sudan" & `su_y' == 0
}


********************************************************************************
**# Step 2.2: Missing Data - Cashcrop Exports - Case-by-case
********************************************************************************

sort country_wup year

* Fill with interpolation
by country_wup: ipolate cashcrops_x year, gen(intcash)
replace cashcrops_x = intcash if country_wup == "Algeria" & cashcrops_x == .
replace cashcrops_x = intcash if country_wup == "Bahamas" & cashcrops_x == .
replace cashcrops_x = intcash if country_wup == "Cabo Verde" & cashcrops_x == .
replace cashcrops_x = intcash if country_wup == "Eritrea" & cashcrops_x == .
replace cashcrops_x = intcash if country_wup == "Guinea-Bissau" & cashcrops_x == .
replace cashcrops_x = intcash if country_wup == "Iraq" & cashcrops_x == .
replace cashcrops_x = intcash if country_wup == "Libya" & cashcrops_x == .
replace cashcrops_x = intcash if country_wup == "Somalia" & cashcrops_x == .
replace cashcrops_x = intcash if country_wup == "Suriname" & cashcrops_x == .
replace cashcrops_x = intcash if country_wup == "Saudi Arabia" & cashcrops_x == .

* Special Cases *

* Bahamas: Assume 0 before 1969 as can't be interpolated
replace cashcrops_x = 0 if country_wup == "Bahamas" & cashcrops_x == .

* Bahrain: Assume 0 before 1967 as can't be interpolated
replace cashcrops_x = 0 if country_wup == "Bahrain" & cashcrops_x == .

* Cabo Verde: Assume 0 after 2016 as can't be interpolated
replace cashcrops_x = 0 if country_wup == "Cabo Verde" & cashcrops_x == .

* Djibouti: Assume 0 when missing
replace cashcrops_x = 0 if country_wup == "Djibouti" & cashcrops_x == .

* Mauritania: Assume 0 when missing
replace cashcrops_x = 0 if country_wup == "Mauritania" & cashcrops_x == .

* Oman: Assume 0 when missing
replace cashcrops_x = 0 if country_wup == "Oman" & cashcrops_x == .

* Qatar: Assume 0 before 1972 as can't be interpolated
*        Drop value for 1987 (Typo)
*        Interpolate between 1975 and 1990 (recalculate interpolation variable)

replace cashcrops_x = 0 if country_wup == "Qatar" & cashcrops_x == . & year<1972
replace cashcrops_x = . if country_wup == "Qatar" & year == 1987
drop intcash
by country_wup: ipolate cashcrops_x year, gen(intcash)
replace cashcrops_x = intcash if country_wup == "Qatar" & cashcrops_x == .

* UAE: Assume 0 when missing
replace cashcrops_x = 0 if country_wup == "United Arab Emirates" & cashcrops_x == .

drop intcash

********************************************************************************
**# Step 2.3: Missing Data - Hides and Skins Exports - Case-by-case
********************************************************************************

* For Countries with missing data we use interpolation
sort country_wup year
by country_wup: ipolate hides_x year, gen(inthides)

* Special Countries 1 * - Interpolation when possible, 0 for extreme (early 1960s, late 2010s) years without data
foreach co in Algeria Angola Bahrain Bangladesh Belize Benin Bhutan Cambodia Cameroon Chad Colombia Congo Cuba Djibouti Ecuador Egypt Eritrea Ethiopia Fiji Gambia Ghana Guatemala Guinea Guyana Haiti India Iraq Jamaica Kuwait Lebanon Lesotho Liberia Libya Madagascar Mali Mauritania Mauritius Mexico Mongolia Morocco Mozambique Myanmar Namibia Nepal Nicaragua Niger Oman Pakistan Panama Paraguay Philippines Qatar Rwanda Suriname Swaziland Thailand Togo Tunisia Uganda Zambia Zimbabwe{
    replace hides_x = inthides if country_wup == "`co'" & hides_x == .
	replace hides_x = 0 if country_wup == "`co'" & hides_x == .
}

replace hides_x = inthides if country_wup == "Brunei Darussalam" & hides_x == .
replace hides_x = 0 if country_wup == "Brunei Darussalam" & hides_x == .

replace hides_x = inthides if country_wup == "Burkina Faso" & hides_x == .
replace hides_x = 0 if country_wup == "Burkina Faso" & hides_x == .

replace hides_x = inthides if country_wup == "Cabo Verde" & hides_x == .
replace hides_x = 0 if country_wup == "Cabo Verde" & hides_x == .

replace hides_x = inthides if country_wup == "Central African Republic" & hides_x == .
replace hides_x = 0 if country_wup == "Central African Republic" & hides_x == .

replace hides_x = inthides if country_wup == "China, Macao SAR" & hides_x == .
replace hides_x = 0 if country_wup == "China, Macao SAR" & hides_x == .

replace hides_x = inthides if country_wup == "China, Taiwan Province of China" & hides_x == .
replace hides_x = 0 if country_wup == "China, Taiwan Province of China" & hides_x == .

replace hides_x = inthides if country_wup == "Costa Rica" & hides_x == .
replace hides_x = 0 if country_wup == "Costa Rica" & hides_x == .

replace hides_x = inthides if country_wup == "Côte d'Ivoire" & hides_x == .
replace hides_x = 0 if country_wup == "Côte d'Ivoire" & hides_x == .

replace hides_x = inthides if country_wup == "Democratic Republic of the Congo" & hides_x == .
replace hides_x = 0 if country_wup == "Democratic Republic of the Congo" & hides_x == .

replace hides_x = inthides if country_wup == "Dominican Republic" & hides_x == .
replace hides_x = 0 if country_wup == "Dominican Republic" & hides_x == .

replace hides_x = inthides if country_wup == "El Salvador" & hides_x == .
replace hides_x = 0 if country_wup == "El Salvador" & hides_x == .

replace hides_x = inthides if country_wup == "Guinea-Bissau" & hides_x == .
replace hides_x = 0 if country_wup == "Guinea-Bissau" & hides_x == .

replace hides_x = inthides if country_wup == "Lao People's Democratic Republic" & hides_x == .
replace hides_x = 0 if country_wup == "Lao People's Democratic Republic" & hides_x == .

replace hides_x = inthides if country_wup == "Papua New Guinea" & hides_x == .
replace hides_x = 0 if country_wup == "Papua New Guinea" & hides_x == .

replace hides_x = inthides if country_wup == "Republic of Korea" & hides_x == .
replace hides_x = 0 if country_wup == "Republic of Korea" & hides_x == .

replace hides_x = inthides if country_wup == "Sri Lanka" & hides_x == .
replace hides_x = 0 if country_wup == "Sri Lanka" & hides_x == .

replace hides_x = inthides if country_wup == "Syrian Arab Republic" & hides_x == .
replace hides_x = 0 if country_wup == "Syrian Arab Republic" & hides_x == .

replace hides_x = inthides if country_wup == "Trinidad and Tobago" & hides_x == .
replace hides_x = 0 if country_wup == "Trinidad and Tobago" & hides_x == .

replace hides_x = inthides if country_wup == "United Arab Emirates" & hides_x == .
replace hides_x = 0 if country_wup == "United Arab Emirates" & hides_x == .

replace hides_x = inthides if country_wup == "Venezuela (Bolivarian Republic of)" & hides_x == .
replace hides_x = 0 if country_wup == "Venezuela (Bolivarian Republic of)" & hides_x == .

replace hides_x = inthides if country_wup == "Viet Nam" & hides_x == .
replace hides_x = 0 if country_wup == "Viet Nam" & hides_x == .

* Other Countries * 

* Replace with 0 when missing
replace hides_x = 0 if country_wup == "Bahamas" & hides_x == .
replace hides_x = 0 if country_wup == "Comoros" & hides_x == .
replace hides_x = 0 if country_wup == "Equatorial Guinea" & hides_x == .
replace hides_x = 0 if country_wup == "Gabon" & hides_x == .
replace hides_x = 0 if country_wup == "Sierra Leone" & hides_x == .
replace hides_x = 0 if country_wup == "Solomon Islands" & hides_x == .

* Botswana and Mongolia don't have data for early years, but exports of hides 
* seem relevant, hence we replace with 1 in 1960 and take interpolation

drop inthides
replace hides_x = 1 if country_wup == "Botswana" & year == 1960
replace hides_x = 1 if country_wup == "Mongolia" & year == 1960
by country_wup: ipolate hides_x year, gen(inthides)
replace hides_x = inthides if country_wup == "Botswana" & hides_x == .
replace hides_x = inthides if country_wup == "Mongolia" & hides_x == .

drop inthides

********************************************************************************
**# Step 2.4: Missing Data - Meat Exports - Case-by-case
********************************************************************************

* For Countries with missing data we use interpolation
sort country_wup year
by country_wup: ipolate meat_x year, gen(intmeat)

* Special Countries 1 * - Interpolation when possible, 0 for extreme (early 1960s, lat 2010s) years without data
foreach co2 in Afghanistan Algeria Angola Bahamas Bahrain Bangladesh Belize Benin Bhutan Burundi Cambodia Cameroon Chad Chile Colombia Comoros Congo Cuba Djibouti Ecuador Eritrea Gabon Gambia Ghana Guinea Guyana Haiti Indonesia Iraq Jordan Kuwait Lebanon Lesotho Liberia Libya Malawi Mali Mauritania Mauritius Mozambique Myanmar Nepal Niger Nigeria Oman Pakistan Panama Peru Philippines Qatar Rwanda Somalia Sudan Suriname Swaziland Thailand Togo Uganda Yemen Zambia{
    replace meat_x = intmeat if country_wup == "`co2'" & meat_x == .
	replace meat_x = 0 if country_wup == "`co2'" & meat_x == .
}

replace meat_x = intmeat if country_wup == "Bolivia (Plurinational State of)" & meat_x == .
replace meat_x = 0 if country_wup == "Bolivia (Plurinational State of)" & meat_x == .

replace meat_x = intmeat if country_wup == "Brunei Darussalam" & meat_x == .
replace meat_x = 0 if country_wup == "Brunei Darussalam" & meat_x == .

replace meat_x = intmeat if country_wup == "Burkina Faso" & meat_x == .
replace meat_x = 0 if country_wup == "Burkina Faso" & meat_x == .

replace meat_x = intmeat if country_wup == "Cabo Verde" & meat_x == .
replace meat_x = 0 if country_wup == "Cabo Verde" & meat_x == .

replace meat_x = intmeat if country_wup == "Central African Republic" & meat_x == .
replace meat_x = 0 if country_wup == "Central African Republic" & meat_x == .

replace meat_x = intmeat if country_wup == "China, Macao SAR" & meat_x == .
replace meat_x = 0 if country_wup == "China, Macao SAR" & meat_x == .

replace meat_x = intmeat if country_wup == "Côte d'Ivoire" & meat_x == .
replace meat_x = 0 if country_wup == "Côte d'Ivoire" & meat_x == .

replace meat_x = intmeat if country_wup == "Democratic Republic of the Congo" & meat_x == .
replace meat_x = 0 if country_wup == "Democratic Republic of the Congo" & meat_x == .

replace meat_x = intmeat if country_wup == "Dominican Republic" & meat_x == .
replace meat_x = 0 if country_wup == "Dominican Republic" & meat_x == .

replace meat_x = intmeat if country_wup == "Equatorial Guinea" & meat_x == .
replace meat_x = 0 if country_wup == "Equatorial Guinea" & meat_x == .

replace meat_x = intmeat if country_wup == "Guinea-Bissau" & meat_x == .
replace meat_x = 0 if country_wup == "Guinea-Bissau" & meat_x == .

replace meat_x = intmeat if country_wup == "Iran (Islamic Republic of)" & meat_x == .
replace meat_x = 0 if country_wup == "Iran (Islamic Republic of)" & meat_x == .

replace meat_x = intmeat if country_wup == "Lao People's Democratic Republic" & meat_x == .
replace meat_x = 0 if country_wup == "Lao People's Democratic Republic" & meat_x == .

replace meat_x = intmeat if country_wup == "Papua New Guinea" & meat_x == .
replace meat_x = 0 if country_wup == "Papua New Guinea" & meat_x == .

replace meat_x = intmeat if country_wup == "Republic of Korea" & meat_x == .
replace meat_x = 0 if country_wup == "Republic of Korea" & meat_x == .

replace meat_x = intmeat if country_wup == "Saudi Arabia" & meat_x == .
replace meat_x = 0 if country_wup == "Saudi Arabia" & meat_x == .

replace meat_x = intmeat if country_wup == "Sierra Leone" & meat_x == .
replace meat_x = 0 if country_wup == "Sierra Leone" & meat_x == .

replace meat_x = intmeat if country_wup == "Solomon Islands" & meat_x == .
replace meat_x = 0 if country_wup == "Solomon Islands" & meat_x == .

replace meat_x = intmeat if country_wup == "Sri Lanka" & meat_x == .
replace meat_x = 0 if country_wup == "Sri Lanka" & meat_x == .

replace meat_x = intmeat if country_wup == "Syrian Arab Republic" & meat_x == .
replace meat_x = 0 if country_wup == "Syrian Arab Republic" & meat_x == .

replace meat_x = intmeat if country_wup == "United Arab Emirates" & meat_x == .
replace meat_x = 0 if country_wup == "United Arab Emirates" & meat_x == .

replace meat_x = intmeat if country_wup == "United Republic of Tanzania" & meat_x == .
replace meat_x = 0 if country_wup == "United Republic of Tanzania" & meat_x == .

replace meat_x = intmeat if country_wup == "Venezuela (Bolivarian Republic of)" & meat_x == .
replace meat_x = 0 if country_wup == "Venezuela (Bolivarian Republic of)" & meat_x == .

replace meat_x = intmeat if country_wup == "Viet Nam" & meat_x == .
replace meat_x = 0 if country_wup == "Viet Nam" & meat_x == .

drop intmeat

********************************************************************************
**# Step 2.5: Missing Data - Total agricultural exports - Case-by-case
********************************************************************************

* Djibouti: Assume missings are 0s
replace agritot_x = 0 if country_wup == "Djibouti" & agritot_x == .

* UAE: Assume missings are 0s
replace agritot_x = 0 if country_wup == "United Arab Emirates" & agritot_x == .

* Qatar: Assume 0 before 1972, as can't be interpolated.
*        Drop value for 1987 (Typo)
*        Interpolate between 1975 and 1990 (Calculate interpolation variable)

replace agritot_x = 0 if country_wup == "Qatar" & agritot_x == . & year<1972
replace agritot_x = . if country_wup == "Qatar" & year == 1987
by country_wup: ipolate agritot_x year, gen(intagri)
replace agritot_x = intagri if country_wup == "Qatar" & agritot_x == .

* Libya: Use interpolation after eliminating observations that are too low
drop intagri
replace agritot_x = . if country_wup == "Libya" & year == 1978
replace agritot_x = . if country_wup == "Libya" & year == 1979
replace agritot_x = . if country_wup == "Libya" & year == 1981
by country_wup: ipolate agritot_x year, gen(intagri)
replace agritot_x = intagri if country_wup == "Libya" & agritot_x == .

drop intagri

********************************************************************************
**# Step 2.6: Missing Data - Total merchandise exports - Case-by-case
********************************************************************************

* Somalia: Replace 2020 missing with 2017 and replace 2017 with lin. interpolation
replace merchfao_x = merchfao_x[_n-1] if country_wup == "Somalia" & year == 2020
replace merchfao_x = 510789.5 if country_wup == "Somalia" & year == 2017

* UAE: Replace missing with WDI data
replace merchfao_x = 2000 if year == 1960 & country_wup == "United Arab Emirates"
replace merchfao_x = 10000 if year == 1962 & country_wup == "United Arab Emirates"
replace merchfao_x = 35000 if year == 1963 & country_wup == "United Arab Emirates"
replace merchfao_x = 125000 if year == 1964 & country_wup == "United Arab Emirates"
replace merchfao_x = 190000 if year == 1965 & country_wup == "United Arab Emirates"

* Djibouti: Replace missing with WDI data
replace merchfao_x = 22000 if year == 1960 & country_wup == "Djibouti"
replace merchfao_x = 25000 if year == 1962 & country_wup == "Djibouti"
replace merchfao_x = 22000 if year == 1963 & country_wup == "Djibouti"
replace merchfao_x = 24000 if year == 1964 & country_wup == "Djibouti"
replace merchfao_x = 28000 if year == 1965 & country_wup == "Djibouti"

********************************************************************************
**# Step 2.7: Missing Data - Forest Exports - Case-by-case
/*******************************************************************************
For some countries we assume export of forest products are 0. This is the case
when the countries can't be interpolated or the values of exports that aren't 
missing are too low and missing variables can be assumed to be indeed 0.

For other countries we use linear interpolation. We do this for countries with
missing variables but observable exports that are large enough to consider
missings points real misssings and not just 0s.
*******************************************************************************/

replace forest_x = 0 if country_wup == "Afghanistan" & forest_x == .
replace forest_x = 0 if country_wup == "Bahrain" & forest_x == .
replace forest_x = 0 if country_wup == "Bangladesh" & forest_x == .
replace forest_x = 0 if country_wup == "Benin" & forest_x == .
replace forest_x = 0 if country_wup == "Bhutan" & forest_x == .
replace forest_x = 0 if country_wup == "Bolivia (Plurinational State of)" & forest_x == .
replace forest_x = 0 if country_wup == "Botswana" & forest_x == .
replace forest_x = 0 if country_wup == "Brunei Darussalam" & forest_x == .
replace forest_x = 0 if country_wup == "Burkina Faso" & forest_x == .
replace forest_x = 0 if country_wup == "Burundi" & forest_x == .
replace forest_x = 0 if country_wup == "Cabo Verde" & forest_x == .
replace forest_x = 0 if country_wup == "Chad" & forest_x == .
replace forest_x = 0 if country_wup == "China, Macao SAR" & forest_x == .
replace forest_x = 0 if country_wup == "Comoros" & forest_x == .
replace forest_x = 0 if country_wup == "Cuba" & forest_x == .
replace forest_x = 0 if country_wup == "Djibouti" & forest_x == .
replace forest_x = 0 if country_wup == "Egypt" & forest_x == .
replace forest_x = 0 if country_wup == "Gambia" & forest_x == .
replace forest_x = 0 if country_wup == "Guinea" & forest_x == . & year != 1980
replace forest_x = 0 if country_wup == "Haiti" & forest_x == .
replace forest_x = 0 if country_wup == "Iraq" & forest_x == .
replace forest_x = 0 if country_wup == "Jamaica" & forest_x == .
replace forest_x = 0 if country_wup == "Jordan" & forest_x == .
replace forest_x = 0 if country_wup == "Kuwait" & forest_x == .
replace forest_x = 0 if country_wup == "Lesotho" & forest_x == .
replace forest_x = 0 if country_wup == "Libya" & forest_x == .
replace forest_x = 0 if country_wup == "Malawi" & forest_x == . & year < 1964
replace forest_x = 0 if country_wup == "Mali" & forest_x == .
replace forest_x = 0 if country_wup == "Mauritania" & forest_x == .
replace forest_x = 0 if country_wup == "Mauritius" & forest_x == .
replace forest_x = 0 if country_wup == "Mongolia" & forest_x == . & year < 1972
replace forest_x = 0 if country_wup == "Niger" & forest_x == .
replace forest_x = 0 if country_wup == "Oman" & forest_x == . & year < 1992
replace forest_x = 0 if country_wup == "Qatar" & forest_x == .
replace forest_x = 0 if country_wup == "Rwanda" & forest_x == . & year < 1991
replace forest_x = 0 if country_wup == "Saudi Arabia" & forest_x == .
replace forest_x = 0 if country_wup == "Senegal" & forest_x == .
replace forest_x = 0 if country_wup == "Sierra Leone" & forest_x == .
replace forest_x = 0 if country_wup == "Sudan" & forest_x == .
replace forest_x = 0 if country_wup == "Togo" & forest_x == .
replace forest_x = 0 if country_wup == "Tunisia" & forest_x == .
replace forest_x = 0 if country_wup == "United Arab Emirates" & forest_x == .
replace forest_x = 0 if country_wup == "Uruguay" & forest_x == .
replace forest_x = 0 if country_wup == "Viet Nam" & forest_x == .
replace forest_x = 0 if country_wup == "Zambia" & forest_x == .

* Replace with linear interporlation
by country_wup: ipolate forest_x year, gen(intforest)
replace forest_x = intforest if country_wup == "Algeria" & forest_x == .
replace forest_x = intforest if country_wup == "Angola" & forest_x == .
replace forest_x = intforest if country_wup == "Bahamas" & forest_x == .
replace forest_x = intforest if country_wup == "Dominican Republic" & forest_x == .
replace forest_x = intforest if country_wup == "Equatorial Guinea" & forest_x == .
replace forest_x = intforest if country_wup == "Eritrea" & forest_x == .
replace forest_x = intforest if country_wup == "Ethiopia" & forest_x == .
replace forest_x = intforest if country_wup == "Guinea" & forest_x == .
replace forest_x = intforest if country_wup == "Iran (Islamic Republic of)" & forest_x == .
replace forest_x = intforest if country_wup == "Malawi" & forest_x == .
replace forest_x = intforest if country_wup == "Mongolia" & forest_x == .
replace forest_x = intforest if country_wup == "Namibia" & forest_x == .
replace forest_x = intforest if country_wup == "Oman" & forest_x == .
replace forest_x = intforest if country_wup == "Pakistan" & forest_x == .
replace forest_x = intforest if country_wup == "Panama" & forest_x == .
replace forest_x = intforest if country_wup == "Rwanda" & forest_x == .
replace forest_x = intforest if country_wup == "Somalia" & forest_x == .
replace forest_x = intforest if country_wup == "Sri Lanka" & forest_x == .
replace forest_x = intforest if country_wup == "Uganda" & forest_x == .
replace forest_x = intforest if country_wup == "Venezuela (Bolivarian Republic of)" & forest_x == .
replace forest_x = intforest if country_wup == "Yemen" & forest_x == .
replace forest_x = intforest if country_wup == "Zimbabwe" & forest_x == .
drop intforest

* Special Case: Liberia
* Values for forest for some years seem to be multiplied by 10 (typo) so we fix
* it. We also interpolate for the missing variables

replace forest_x = 21385.1 if country_wup == "Liberia" & year == 2000
replace forest_x = 22833.6 if country_wup == "Liberia" & year == 2001
replace forest_x = 33349 if country_wup == "Liberia" & year == 2002
replace forest_x = 17805.6 if country_wup == "Liberia" & year == 2003

by country_wup: ipolate forest_x year, gen(intforest)
replace forest_x = intforest if country_wup == "Liberia" & forest_x == .

drop intforest

/*******************************************************************************
**# Step 3: Finalize Data
*******************************************************************************/

* Drop countries we do not need
drop if country_wup == "Sudan (former)"
drop if country_wup == "South Sudan"
drop if country_wup == "Ethiopia PDR"

/*******************************************************************************
Generating moving averages
*******************************************************************************/

* Sorting
sort country_wup year

* Moving Average (-1/+1)
foreach dvar1 in cashcrops_x hides_x meat_x agritot_x merchfao_x forest_x {
    * Generating Variable ma1
	gen `dvar1'_ma1 = 0
	* Estimating ma1 for the sample except 1960 and 2020
	foreach ma1 of numlist 2/57{
		by country_wup: replace `dvar1'_ma1 = (`dvar1'[`ma1'-1] + `dvar1'[`ma1'] + `dvar1'[`ma1'+1])/3 if _n == `ma1' 
	}
	* Estimating ma1 for 1960 and 2020
	by country_wup: replace `dvar1'_ma1 = (`dvar1'[1] + `dvar1'[2])/2 if _n == 1
	by country_wup: replace `dvar1'_ma1 = (`dvar1'[58] + `dvar1'[57])/2 if _n == 58
}

* Taking moving averages with a window of 2
foreach dvar2 in cashcrops_x hides_x meat_x agritot_x merchfao_x forest_x {
    * Generating Variable ma2
	gen `dvar2'_ma2 = 0
	foreach ma2 of numlist 3/56{
		by country_wup: replace `dvar2'_ma2 = ///
		(`dvar2'[`ma2'-2] + `dvar2'[`ma2'-1] + `dvar2'[`ma2'] + `dvar2'[`ma2'+1] + `dvar2'[`ma2'+2])/5 ///
		if _n == `ma2' 
}
	* Estimating ma2 for 1960, 1962, 2017 and 2020
	by country_wup: replace `dvar2'_ma2 = (`dvar2'[1] + `dvar2'[2] + `dvar2'[3])/3 if _n == 1
	by country_wup: replace `dvar2'_ma2 = (`dvar2'[1] + `dvar2'[2] + `dvar2'[3] + `dvar2'[4])/4 if _n == 2
	by country_wup: replace `dvar2'_ma2 = (`dvar2'[55] + `dvar2'[56] + `dvar2'[57] + `dvar2'[58])/4 if _n == 57
	by country_wup: replace `dvar2'_ma2 = (`dvar2'[56] + `dvar2'[57] + `dvar2'[58])/3 if _n == 58
}

/*******************************************************************************
Calculate Shares and Label variables
*******************************************************************************/

* Calculating shares

* Total agricultural + foresty exports
egen agri_x = rowtotal(agritot_x forest_x)
egen agri_x_ma1 = rowtotal(agritot_x_ma1 forest_x_ma1)
egen agri_x_ma2 = rowtotal(agritot_x_ma2 forest_x_ma2)

la var agri_x "Total X of agri. and forestry goods"
la var agri_x_ma1 "Total X of agri. and forestry goods (MA1)"
la var agri_x_ma2 "Total X of agri. and forestry goods (MA2)"

* Share of agricultural + foresty exports in merchandise X
gen agri_sh_x = agri_x/merchfao_x
gen agri_sh_x_ma1 = agri_x_ma1/merchfao_x_ma1
gen agri_sh_x_ma2 = agri_x_ma2/merchfao_x_ma2

la var agri_sh_x "Share of agri. and forestry X in merchandise X"
la var agri_sh_x_ma1 "Share of agri. and forestry X in merchandise X (MA1)"
la var agri_sh_x_ma2 "Share of agri. and forestry X in merchandise X (MA2)"

* Share of cashcrops and meat and hides in merchandise X
gen cash_sh_x = (cashcrops_x + meat_x + hides_x)/merchfao_x
gen cash_sh_x_ma1 = (cashcrops_x_ma1 + meat_x_ma1 + hides_x_ma1)/merchfao_x_ma1
gen cash_sh_x_ma2 = (cashcrops_x_ma2 + meat_x_ma2 + hides_x_ma2)/merchfao_x_ma2

la var cash_sh_x "Share of cashcrops (No Bananas) + meat and hides X in merchandise X"
la var cash_sh_x_ma1 "Share of cashcrops (No Bananas) + meat and hides X in merchandise X (MA1)"
la var cash_sh_x_ma2 "Share of cashcrops (No Bananas) + meat and hides X in merchandise X (MA2)"

* Share of cashcrops and meat and hides in agricultural + foresty exports
gen cash_sh_agri = (cashcrops_x + meat_x + hides_x)/agri_x
gen cash_sh_agri_ma1 = (cashcrops_x_ma1 + meat_x_ma1 + hides_x_ma1)/agri_x_ma1
gen cash_sh_agri_ma2 = (cashcrops_x_ma2 + meat_x_ma2 + hides_x_ma2)/agri_x_ma2

la var cash_sh_agri "Share of cashcrops (No Bananas) + meat and hides X in agri. and forestry X"
la var cash_sh_agri_ma1 "Share of cashcrops (No Bananas) + meat and hides X in agri. and forestry X (MA1)"
la var cash_sh_agri_ma2 "Share of cashcrops (No Bananas) + meat and hides X in agri. and forestry X (MA2)"


* Other Labels
la var cashcrops_x "Export of Cashcrops (No Bananas) + Meat and Hides"
la var agritot_x "Export of agricultural products"
la var merchfao_x "Value of Merchandise X"
la var forest_x "Value of forest products X"
la var cashcrops_x_ma1 "Export of Cashcrops (No Bananas) + Meat and Hides (MA1)"
la var cashcrops_x_ma2 "Export of Cashcrops (No Bananas) + Meat and Hides (MA2)"
la var agritot_x_ma1 "Export of agricultural products (MA1)"
la var agritot_x_ma2 "Export of agricultural products (MA2)"
la var merchfao_x_ma1 "Value of Merchandise X (MA1)"
la var merchfao_x_ma2 "Value of Merchandise X (MA2)"
la var forest_x_ma1 "Value of forest products X (MA1)"
la var forest_x_ma2 "Value of forest products X (MA1)"

/*******************************************************************************
Change shares > 1 as = 1 and express in percentage
*******************************************************************************/

foreach v of varlist agri_sh_x agri_sh_x_ma1 agri_sh_x_ma2 cash_sh_x cash_sh_x_ma1 cash_sh_x_ma2 cash_sh_agri cash_sh_agri_ma1 cash_sh_agri_ma2{
	replace `v' = 1 if `v'>1 & `v' != .
}

foreach v of varlist agri_sh_x agri_sh_x_ma1 agri_sh_x_ma2 cash_sh_x cash_sh_x_ma1 cash_sh_x_ma2 cash_sh_agri cash_sh_agri_ma1 cash_sh_agri_ma2{
	replace `v' = `v'*100
}

/*******************************************************************************
Finalize Dataset
*******************************************************************************/

* Keep desired variables
keep country_wup year agri_sh_x agri_sh_x_ma1 agri_sh_x_ma2 cash_sh_x cash_sh_x_ma1 cash_sh_x_ma2 cash_sh_agri cash_sh_agri_ma1 cash_sh_agri_ma2 merchfao_x

* Keep desired countries
merge m:1 country_wup using "processed_datasets/ccode", keepusing(code_wb)
keep if _merge == 3
drop _merge

drop country_wup

* Save
save "processed_datasets/dataset_agriexp.dta", replace
